###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file measures candidates support by agreement x consistency for levels of knowledege and crosspressure
########################################################

#dirs="~/Dropbox/replication0/"
#dirs should be set here or in runR.R

rm(list=ls()[which(ls()!='dirs')])
library(ggplot2)
library(stringr)
library(glmnet)
library(xtable)
# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

setwd(dirs)
load("data/cces_stacked_unmatched.Rdata")

###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)


load('data/data_matrix_scored.Rdata')
candidate_matrix=data_matrix

###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank #doF
 #dfc <- (M/(M-1))*((N-1)/(N-K))
 dfc <- ((N-1)/(N-K))
 # estfun is :: residuals(fm) * X
 # summing estimating function to the cluster || i.e., summing residuals to the cluster level  over units in cluster
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

############################################################
############################################################
###Now for Primary Elections (by PID)

vote <- subset(candidate_matrix,candidate_matrix$conjoints==3|candidate_matrix$conjoints==8)
vote$pty <- ifelse(vote$conjoints==3,0,1)
vote$screen2[which(is.na(vote$screen2))]=-1

#covars=vote[,c(129:245,248:293)]
#na.vec=array(NA,ncol(covars))
#for(j in 1:length(na.vec)){
	#na.vec[j]=length(which(is.na(covars[,j])))
#}
#covars[which(is.na(covars[,which(na.vec>0)])),which(na.vec>0)]=-1
#e0=c()
#for(j in c(129,132:245,248:293)){
	#vote[,j]=as.factor(as.character(vote[,j]))
	#if(length(levels(vote[,j]))<2){
	#	e0=c(e0,j)
	#}
#}

vote$know_senate2=gsub(vote$know_senate2,pattern='Not Asked',replace='Never Heard of Person' )
vote$know_senate1=gsub(vote$know_senate1,pattern='Not Asked' ,replace='Never Heard of Person' )
vote$know_gov=gsub(vote$know_gov,pattern='Not Asked' ,replace='Never Heard of Person')
vote$know_house=gsub(vote$know_house,pattern='Not Asked',replace='Never Heard of Person')


#factor::
lst=c('milstat_1','milstat_2','milstat_3','milstat_4','milstat_5','trans',
'healthins_1','healthins_2','healthins_3','healthins_4','healthins_5','healthins_6',
'media_blog','media_tv','media_news','media_radio','media_social','media_none',
'us_senate_majority','know_gov','know_senate1','know_senate2','know_house','party_gov','party_senate1',
'party_senate2','party_house','immi_policy9','state','job','sexuality','healthins2','media_natlnews','media_printnews','socmedia_polstory1',
'socmedia_polcomment','socmedia_polstory2','socmedia_polfollow','socmedia_polstory3',
'us_house_majority','pre_pid7')

for(j in lst){
	vote[,j]=as.factor(as.character(vote[,j]))
	if(length(levels(vote[,j]))==2){
		vote[,j]=as.numeric(vote[,j])-1
	}
}

allComboSample=function(row.vec=NULL,cov.list=cov.list,party=NULL,issue.list=NULL,party.list=NULL){
	nums=129 #3*16+7*15; order shouldn't matter so 129 is the permutation
	row.vec[,cov.list]=0
	row.mat=as.data.frame(matrix(NA,nums,ncol(row.vec)))
	names(row.mat)=names(row.vec)
	e0=0
	if(party=='R'){
		for(i in 1:(length(cov.list)-1)){
			for(j in (i+1):length(cov.list)){
				if(issue.list[i]!=issue.list[j] & party.list[i]!='D' & party.list[j]!='D'){
					e0=e0+1
					row.mat[e0,]=row.vec
					row.mat[e0,c(cov.list[i],cov.list[j])]=1
				} # if control
			} # inner loop j
		} # outer loop i
	} # party is R
	e0=0
	if(party=='D'){
		for(i in 1:(length(cov.list)-1)){
			for(j in (i+1):length(cov.list)){
				if(issue.list[i]!=issue.list[j] & party.list[i]!='R' & party.list[j]!='R'){
					e0=e0+1
					row.mat[e0,]=row.vec
					row.mat[e0,c(cov.list[i],cov.list[j])]=1
				} # if control
			} # inner loop j
		} # outer loop i
	} # party is D
	return(row.mat)
}

###########################
#STEP 2: regression model
###########################

cvs=c('dv_choice','pty','g_female','re_black','re_hispanic','r_catholic','r_evangelical','r_protestant',
'o_ceo','o_citycouncil','o_factoryforeman','o_farmer','o_usarmymajor',
'o_politicalstaffer','o_smallbizowner','o_stateleg','o_teacher',
'p_compassionate','p_empathetic','p_inspiring','p_intelligent','p_knowledgeable','p_moral','p_strongleader',
'e_business','e_christian','e_civilrights','e_energy','e_environment','e_guncontrol','e_gunrights',
'e_laborunions','e_reproductive','e_taxreform','e_teaparty','e_veterans',
'rec_refuse','rec_secure','rec_stand','rec_work',
#
'age','educ','edloan','married','reg','vote12','vote12_obama','primary','state','employ','job','born_again',
'milstat_1','milstat_2','milstat_3','milstat_4','milstat_5','unionhh','faminc','investor','sexuality','trans',
'healthins_1','healthins_2','healthins_3','healthins_4','healthins_5','healthins_6','healthins2','newsint',
'media_blog','media_tv','media_news','media_radio','media_social','media_none','media_natlnews','media_printnews',
'socmedia_polstory1','socmedia_polcomment','socmedia_polstory2','socmedia_polfollow','socmedia_polstory3',
'mip_guns','mip_abortion','mip_taxes','mip_immigration','mip_deficit','mip_defense','mip_socsec','mip_env',
'mip_jobs','mip_crime','mip_natlsecurity','mip_race','mip_healthcare','mip_gays','mip_govtcorruption','mip_religion',
'us_house_majority','us_senate_majority','know_gov','know_senate1','know_senate2','know_house','dem_place','rep_place',
'self_place','guns_policy1','guns_policy2','guns_policy3','guns_policy4','immi_policy1','immi_policy2','immi_policy3',
'immi_policy4','immi_policy5','immi_policy6','immi_policy7','immi_policy8','immi_policy9','abrt_policy1','abrt_policy2',
'abrt_policy3','abrt_policy4','abrt_policy5','abrt_policy6','envs_policy1','envs_policy2','envs_policy3','envs_policy4',
'crme_policy1','crme_policy2','crme_policy3','crme_policy4','gays_policy1','budg_policy1','budg_policy2','budg_policy3',
'roll_garland','roll_tpp_act','roll_trd_adj','roll_usa_fre','roll_iransct','roll_educrfr','roll_infrast','roll_medicar',
'roll_rpl_aca','roll_minwage','screen1','screen2','pre_pid7','female','white','black','hisps','single','retired',
'unemploy','religion_none','religion_catholic','religion_protestant','religion_evangelical','queer_identity',
'health_insured','aca_insured','sophistication','taxs_policy1','taxs_policy2','need_policy1','need_policy2',
'dfns_policy1','dfns_policy2',
#
'i_raisetaxes','i_cuttaxes','i_lgbt','i_marriage','i_drilling','i_need','i_govabuse','i_righttochoose',
'i_gunrights','i_unfairtrade','i_unbornlives','i_citizenship','i_reducemilitary','i_policing',
'i_co2emissions','i_bordersecurity','i_guncontrol','i_strengthenmilitary','i_criminals'
#
)

isit.factor=array(0,length(cvs))
for(j in 1:length(cvs)){
	isit.factor[j]=as.numeric(is.factor(vote[,cvs[j]]))
}

# state might help, but will exclude all for now ...
xcV=cvs[which(isit.factor==1)]
#[1] "state"               "job"                 "sexuality"           "trans"               "healthins2"
#[6] "media_natlnews"      "media_printnews"     "socmedia_polstory1"  "socmedia_polcomment" "socmedia_polstory2"
#[11] "socmedia_polfollow"  "socmedia_polstory3"  "us_house_majority"   "us_senate_majority"  "know_gov"
#[16] "know_senate1"        "know_senate2"        "know_house"          "pre_pid7"

cL=c('age','educ','edloan','married','reg','vote12','vote12_obama','primary','state','employ','job','born_again',
'milstat_1','milstat_2','milstat_3','milstat_4','milstat_5','unionhh','faminc','investor','sexuality','trans',
'healthins_1','healthins_2','healthins_3','healthins_4','healthins_5','healthins_6','healthins2','newsint',
'media_blog','media_tv','media_news','media_radio','media_social','media_none','media_natlnews','media_printnews',
'socmedia_polstory1','socmedia_polcomment','socmedia_polstory2','socmedia_polfollow','socmedia_polstory3',
'mip_guns','mip_abortion','mip_taxes','mip_immigration','mip_deficit','mip_defense','mip_socsec','mip_env',
'mip_jobs','mip_crime','mip_natlsecurity','mip_race','mip_healthcare','mip_gays','mip_govtcorruption','mip_religion',
'us_house_majority','us_senate_majority','know_gov','know_senate1','know_senate2','know_house','dem_place','rep_place',
'self_place','guns_policy1','guns_policy2','guns_policy3','guns_policy4','immi_policy1','immi_policy2','immi_policy3',
'immi_policy4','immi_policy5','immi_policy6','immi_policy7','immi_policy8','immi_policy9','abrt_policy1','abrt_policy2',
'abrt_policy3','abrt_policy4','abrt_policy5','abrt_policy6','envs_policy1','envs_policy2','envs_policy3','envs_policy4',
'crme_policy1','crme_policy2','crme_policy3','crme_policy4','gays_policy1','budg_policy1','budg_policy2','budg_policy3',
'roll_garland','roll_tpp_act','roll_trd_adj','roll_usa_fre','roll_iransct','roll_educrfr','roll_infrast','roll_medicar',
'roll_rpl_aca','roll_minwage','screen1','screen2','pre_pid7','female','white','black','hisps','single','retired',
'unemploy','religion_none','religion_catholic','religion_protestant','religion_evangelical','queer_identity',
'health_insured','aca_insured','sophistication','taxs_policy1','taxs_policy2','need_policy1','need_policy2',
'dfns_policy1','dfns_policy2')
cR=c(
	'i_righttochoose','i_unbornlives','i_guncontrol','i_gunrights','i_lgbt','i_marriage',
	'i_raisetaxes','i_cuttaxes','i_need','i_govabuse','i_freetrade','i_unfairtrade',
	'i_citizenship','i_bordersecurity','i_reducemilitary','i_strengthenmilitary','i_policing','i_criminals',
	'i_co2emissions','i_drilling'
)

for(i in 1:length(xcV)){
	cL=cL[-c(which(cL==xcV[i]))]
	cvs=cvs[-c(which(cvs==xcV[i]))]
}
rm(xcV)

# producing data interactions
e0=0
ixvote=as.data.frame(matrix(NA,nrow(vote),length(cL)*length(cR)))
ixname=array(NA,length(cL)*length(cR))
for(i in 1:length(cL)){
	for(j in 1:length(cR)){
		e0=e0+1
		ixvote[,e0]=vote[,cL[i]]*vote[,cR[j]]
		ixname[e0]=paste(cL[i],cR[j],sep=':')
	}
}
names(ixvote)=ixname
vote=cbind(vote,ixvote)


id=which(vote$pid==-1)
ir=which(vote$pid==1)

# kinder and kalmoe + identify the 20-30% who score highest on ideological knowledge/consistency
vote$know_gov=k_gov=as.character(vote$know_gov)
vote$know_senate1=k_sen1=as.character(vote$know_senate1)
vote$know_senate2=k_sen2=as.character(vote$know_senate2)

k_gov[which(vote$know_gov=='Republican')]=1
k_gov[grep(vote$know_gov,pattern='Independent')]=0
k_gov[which(vote$know_gov=='Democrat')]=-1
k_gov[!grepl(k_gov,pattern='[0,1]')]=-2

k_sen1[which(vote$know_senate1=='Republican')]=1
k_sen1[grep(vote$know_senate1,pattern='Independent')]=0
k_sen1[which(vote$know_senate1=='Democrat')]=-1
k_sen1[!grepl(k_sen1,pattern='[0,1]')]=-2

k_sen2[which(vote$know_senate2=='Republican')]=1
k_sen2[grep(vote$know_senate2,pattern='Independent')]=0
k_sen2[which(vote$know_senate2=='Democrat')]=-1
k_sen2[!grepl(k_sen2,pattern='[0,1]')]=-2

k_gov=as.numeric(k_gov)
k_sen1=as.numeric(k_sen1)
k_sen2=as.numeric(k_sen2)

sen2=sign(tapply(k_sen2[which(k_sen2> -2)],vote$state[which(k_sen2> -2)],mean,na.rm=T)) # sanders and angus king
sen2[which(names(sen2)=='Wyoming')]=1
sen2[which(names(sen2)=='Vermont')]=0
sen2[which(names(sen2)=='Maine')]=0
sen1=sign(tapply(k_sen1[which(k_sen1> -2)],vote$state[which(k_sen1> -2)],mean,na.rm=T))
gov=sign(tapply(k_gov[which(k_gov> -2)],vote$state[which(k_gov> -2)],mean,na.rm=T))

know_gov=know_senate2=know_senate1=array('Democrat',nrow(vote))
vote$state=as.character(vote$state)
un_state=unique(vote$state)

for(j in 1:length(un_state)){
	if(un_state[j]!="District of Columbia"){

		ix=which(vote$state==un_state[j])

		if(gov[which(names(gov)==un_state[j])]==1){
			know_gov[ix]='Republican'
		}
		if(gov[which(names(gov)==un_state[j])]==0){
			know_gov[ix]='Other Party / Independent'
		}

		if(sen1[which(names(sen1)==un_state[j])]==1){
			know_senate1[ix]='Republican'
		}
		if(sen1[which(names(sen1)==un_state[j])]==0){
			know_senate1[ix]='Other Party / Independent'
		}

		if(sen2[which(names(sen2)==un_state[j])]==1){
			know_senate2[ix]='Republican'
		}
		if(sen2[which(names(sen2)==un_state[j])]==0){
			know_senate2[ix]='Other Party / Independent'
		}

	}
}


# knowledge battery
X=cbind(as.numeric(vote$know_senate1==know_senate1), 	# know party of sen1
as.numeric(vote$know_senate2==know_senate2),					# know party of sen2
as.numeric(vote$know_gov==know_gov),									# know party of gov
as.numeric(vote$us_house_majority=='Republicans'),		# know house majority
as.numeric(vote$us_senate_majority=='Republicans'),		# know senate majority
as.numeric(vote$self_place_attempt==1),								# attempt self place
as.numeric(vote$dem_place<vote$rep_place))		# place dems left of reps

colnames(X)=c('know_sen1','know_sen2','know_gov','know_house_maj','know_sen_maj','know_any_self_place','know_D_left_R')
vote$know_senate1=X[,which(colnames(X)=="know_sen1")]
vote$know_senate2=X[,which(colnames(X)=="know_sen2")]
vote$know_gov=X[,which(colnames(X)=="know_gov")]
vote$us_house_majority=X[,which(colnames(X)=="know_house_maj")]
vote$us_senate_majority=X[,which(colnames(X)=="know_sen_maj")]
vote$self_place_attempt=X[,which(colnames(X)=="know_any_self_place")]
vote$know_D_left_R=X[,which(colnames(X)=="know_D_left_R")]


# produce unique models by PID
reps <- subset(vote, vote$pid3==1)
dems <- subset(vote, vote$pid3==-1)
inds <- subset(vote, vote$pid3==0)

#dv_choice is the vote choice variable
# pty is included for independents here, otherwise of course NAs out.

cvs=c(cvs,ixname)
rX=reps[,c(cvs)]
dX=dems[,c(cvs)]

do.baseline.ols=T
if(do.baseline.ols==T){
	if(!file.exists('data/agreement_consistency_imputation_OLS.Rdata')){
		# baseline models
		primary_elec_reps <- lm(formula(rX),data=rX)
		primary_elec_dems <- lm(formula(dX),data=dX)
		# downside is have to replicate the interaction in the simulations in the same way

		######## remove na coefficients and repeat
		insR.coef=c('dv_choice',gsub(names(which(abs(primary_elec_reps$coef)<20 & !is.na(primary_elec_reps$coef))),pattern='`',replace='')[-c(1)])
		insD.coef=c('dv_choice',gsub(names(which(abs(primary_elec_dems$coef)<20 & !is.na(primary_elec_dems$coef))),pattern='`',replace='')[-c(1)])
		rX=rX[,insR.coef]
		dX=dX[,insD.coef]
		primary_elec_reps <- lm(formula(rX),data=rX)
		primary_elec_dems <- lm(formula(dX),data=dX)

		save(insR.coef,insD.coef,primary_elec_reps,primary_elec_dems,file='data/agreement_consistency_imputation_OLS.Rdata')
	} else{
		load('data/agreement_consistency_imputation_OLS.Rdata')
	}

	###########################
	#STEP 3: validated via prediction
	###########################
	# rough validation:
	#set.seed(1005)
	#r0=sample(replace=F,1:nrow(rX),size=round(nrow(rX)/2))
	#d0=sample(replace=F,1:nrow(dX),size=round(nrow(dX)/2))
	#primary_elec_reps_0 <- lm(formula(rX),data=rX[r0,])
	#primary_elec_dems_0 <- lm(formula(dX),data=dX[d0,])
	#predict
	yR_1=predict(object=primary_elec_reps, newdata=rX)
	yD_1=predict(object=primary_elec_dems, newdata=dX)

	# overfit model
	mean(rX$dv_choice[which(reps$candidate=='A')][which(yR_1[which(reps$candidate=='A')]>yR_1[which(reps$candidate=='B')])])
	#[1] 0.9090402
	mean(dX$dv_choice[which(dems$candidate=='A')][which(yD_1[which(dems$candidate=='A')]>yD_1[which(dems$candidate=='B')])])
	#[1] 0.8555895

	# rough validation [left out model]:
	set.seed(1005)
	aR=sample(which(reps$candidate=='A'),size=length(which(reps$candidate=='A'))/2,replace=F)
	aD=sample(which(dems$candidate=='A'),size=length(which(dems$candidate=='A'))/2,replace=F)
	bR=which(reps$candidate=='B')[aR]
	bD=which(dems$candidate=='B')[aD]

	r0=c(aR,bR)
	d0=c(aD,bD)

	if(!file.exists('data/agreement_consistency_imputation_OLS_0.Rdata')){
		# baseline models
		primary_elec_reps_0 <- lm(formula(rX),data=rX[r0,])
		primary_elec_dems_0 <- lm(formula(dX),data=dX[d0,])
		# downside is have to replicate the interaction in the simulations in the same way
		save(primary_elec_reps_0,primary_elec_dems_0,file='data/agreement_consistency_imputation_OLS_0.Rdata')
	} else{
		load('data/agreement_consistency_imputation_OLS_0.Rdata')
	}

	#predict

	fR=primary_elec_reps_0$fitted
	fD=primary_elec_dems_0$fitted

	yR_2=predict(object=primary_elec_reps_0, newdata=rX[c(r0),])
	yD_2=predict(object=primary_elec_dems_0, newdata=dX[c(d0),])

	mean(rX$dv_choice[r0][which(reps$candidate[c(r0)]=='A')][which(yR_2[which(reps$candidate[c(r0)]=='A')]>yR_2[which(reps$candidate[c(r0)]=='B')])])
	mean(dX$dv_choice[d0][which(dems$candidate[c(d0)]=='A')][which(yD_2[which(dems$candidate[c(d0)]=='A')]>yD_2[which(dems$candidate[c(d0)]=='B')])])

	yR_2=predict(object=primary_elec_reps_0, newdata=rX[-c(r0),])
	yD_2=predict(object=primary_elec_dems_0, newdata=dX[-c(d0),])
	# proper fit model
	mean(rX$dv_choice[-c(r0)][which(reps$candidate[-c(r0)]=='A')][which(yR_2[which(reps$candidate[-c(r0)]=='A')]>yR_2[which(reps$candidate[-c(r0)]=='B')])])
	#[1] 0.6857798
	mean(dX$dv_choice[-c(d0)][which(dems$candidate[-c(d0)]=='A')][which(yD_2[which(dems$candidate[-c(d0)]=='A')]>yD_2[which(dems$candidate[-c(d0)]=='B')])])
	#[1] 0.6867637
	## >= 69% accuracy on OLS model

}

###################################
###################################
# alternative using ridge but takes a while....
###################################
###################################
do.ridge=T
long.run=F
if(do.ridge==T){
	#cv to assess prediction .....
	#R.first
	lambda=seq(from=.917,to=0.0002,by=-0.05)
	# b. set vector for lasso v. ridge weight; alpha = 1 is lasso; alpha = 0 is ridge; mixture is elastic
	alpha=seq(from=0,to=.3,by=0.1)
	# c. set data
	y=as.matrix(rX$dv_choice) #as.matrix(ads_mean[1:150])
	x=as.matrix(rX[,-c(1)]) #as.matrix(Za[1:150,])
	# d. record sumMSE; leaving one out for each alpha,lambda value
	#ix=which(colSums(x)!=0)
	#w=w[,ix]

	set.seed(1005)
	sumMSE=matrix(NA,length(alpha),length(lambda))

	if(long.run==T){
		for(i in 1:length(alpha)){
			oo=cv.glmnet(nfolds=10,x=x, y=y, family=c("gaussian"),nlambda=length(lambda),
				alpha=alpha[i],lambda=lambda)
			if(length(oo$cvm)==length(sumMSE[i,])){
				sumMSE[i,]=oo$cvm
			}
		}

		for(i in 1:length(alpha)){
			if(length(which(sumMSE[i,]==min(sumMSE)))>0){break}
		}

		#lambda[which(sumMSE[i,]==min(sumMSE))]

		alphaR=alpha[i]
		#[1] 0
		lambdaR=lambda[which(sumMSE[i,]==min(sumMSE))]
		#[1] 0.117
	}
	alphaR=0
	lambdaR=0.117

	#predicts_new=oo$a0+x%*%oo$beta
	#cor.test(as.numeric(predicts_new[,1]>0.5),y[,1])
	modR=glmnet(x=x, y=y, family=c("gaussian"),nlambda=length(lambdaR),
		alpha=alphaR,lambda=lambdaR)

	#D.second
	lambda=seq(from=.917,to=0.0002,by=-0.05)
	# b. set vector for lasso v. ridge weight; alpha = 1 is lasso; alpha = 0 is ridge; mixture is elastic
	alpha=seq(from=0,to=.3,by=0.1)
	# c. set data
	y=as.matrix(dX$dv_choice) #as.matrix(ads_mean[1:150])
	x=as.matrix(dX[,-c(1)]) #as.matrix(Za[1:150,])
	# d. record sumMSE; leaving one out for each alpha,lambda value
	#ix=which(colSums(x)!=0)
	#w=w[,ix]

	set.seed(1005)
	sumMSE=matrix(NA,length(alpha),length(lambda))

	if(long.run==T){
		for(i in 1:length(alpha)){
			oo=cv.glmnet(nfolds=10,x=x, y=y, family=c("gaussian"),nlambda=length(lambda),
				alpha=alpha[i],lambda=lambda)
			if(length(oo$cvm)==length(sumMSE[i,])){
				sumMSE[i,]=oo$cvm
			}
		}

		for(i in 1:length(alpha)){
			if(length(which(sumMSE[i,]==min(sumMSE)))>0){break}
		}

		#lambda[which(sumMSE[i,]==min(sumMSE))]

		alphaD=alpha[i]
		#[1] 0
		lambdaD=lambda[which(sumMSE[i,]==min(sumMSE))]
		#[1]  0.217
	}

	alphaD=0
	lambdaD=.217

	#predicts_new=oo$a0+x%*%oo$beta
	#cor.test(as.numeric(predicts_new[,1]>0.5),y[,1])
	modD=glmnet(x=x, y=y, family=c("gaussian"),nlambda=length(lambdaD),
		alpha=alphaD,lambda=lambdaD)
	#
	# test overfit model

	yR_0=(modR$a0+as.matrix(rX[,-c(1)])%*%modR$beta)[,1]
	yD_0=(modD$a0+as.matrix(dX[,-c(1)])%*%modD$beta)[,1]

	# overfit model
	mean(rX$dv_choice[which(reps$candidate=='A')][which(yR_0[which(reps$candidate=='A')]>yR_0[which(reps$candidate=='B')])])
	#[1] 0.8578595
	mean(dX$dv_choice[which(dems$candidate=='A')][which(yD_0[which(dems$candidate=='A')]>yD_0[which(dems$candidate=='B')])])
	#[1] 0.8013018

	# rough validation [left out model]:
	set.seed(1005)
	aR=sample(which(reps$candidate=='A'),size=length(which(reps$candidate=='A'))/2,replace=F)
	aD=sample(which(dems$candidate=='A'),size=length(which(dems$candidate=='A'))/2,replace=F)
	bR=which(reps$candidate=='B')[aR]
	bD=which(dems$candidate=='B')[aD]

	r0=c(aR,bR)
	d0=c(aD,bD)

	#cv to assess prediction .....
	#R.first
	lambda=seq(from=.917,to=0.0002,by=-0.05)
	# b. set vector for lasso v. ridge weight; alpha = 1 is lasso; alpha = 0 is ridge; mixture is elastic
	alpha=seq(from=0,to=.3,by=0.1)
	# c. set data
	y=as.matrix(rX$dv_choice)[r0] #as.matrix(ads_mean[1:150])
	x=as.matrix(rX[r0,-c(1)]) #as.matrix(Za[1:150,])
	# d. record sumMSE; leaving one out for each alpha,lambda value
	#ix=which(colSums(x)!=0)
	#w=w[,ix]

	set.seed(1005)
	sumMSE=matrix(NA,length(alpha),length(lambda))

	if(long.run==T){
		for(i in 1:length(alpha)){
			oo=cv.glmnet(nfolds=10,x=x, y=y, family=c("gaussian"),nlambda=length(lambda),
				alpha=alpha[i],lambda=lambda)
			if(length(oo$cvm)==length(sumMSE[i,])){
				sumMSE[i,]=oo$cvm
			}
		}

		for(i in 1:length(alpha)){
			if(length(which(sumMSE[i,]==min(sumMSE)))>0){break}
		}

		#lambda[which(sumMSE[i,]==min(sumMSE))]

		alphaR=alpha[i]
		#[1] 0
		lambdaR=lambda[which(sumMSE[i,]==min(sumMSE))]
		#[1] 0.417
	}

	alphaR=0
	lambdaR=.417

	#predicts_new=oo$a0+x%*%oo$beta
	#cor.test(as.numeric(predicts_new[,1]>0.5),y[,1])
	modR=glmnet(x=as.matrix(rX[c(r0),-c(1)]), y=as.matrix(rX$dv_choice)[c(r0)], family=c("gaussian"),nlambda=length(lambdaR),
		alpha=alphaR,lambda=lambdaR)

	#D.second
	lambda=seq(from=.917,to=0.0002,by=-0.05)
	# b. set vector for lasso v. ridge weight; alpha = 1 is lasso; alpha = 0 is ridge; mixture is elastic
	alpha=seq(from=0,to=.3,by=0.1)
	# c. set data
	y=as.matrix(dX$dv_choice)[d0] #as.matrix(ads_mean[1:150])
	x=as.matrix(dX[d0,-c(1)]) #as.matrix(Za[1:150,])
	# d. record sumMSE; leaving one out for each alpha,lambda value
	#ix=which(colSums(x)!=0)
	#w=w[,ix]

	set.seed(1005)
	sumMSE=matrix(NA,length(alpha),length(lambda))

	if(long.run==T){
		for(i in 1:length(alpha)){
			oo=cv.glmnet(nfolds=10,x=x, y=y, family=c("gaussian"),nlambda=length(lambda),
				alpha=alpha[i],lambda=lambda)
			if(length(oo$cvm)==length(sumMSE[i,])){
				sumMSE[i,]=oo$cvm
			}
		}

		for(i in 1:length(alpha)){
			if(length(which(sumMSE[i,]==min(sumMSE)))>0){break}
		}

		#lambda[which(sumMSE[i,]==min(sumMSE))]

		alphaD=alpha[i]
		#[1] 0
		lambdaD=lambda[which(sumMSE[i,]==min(sumMSE))]
		#[1]  0.767
	}

	alphaD=0
	lambdaD=.767

	#predicts_new=oo$a0+x%*%oo$beta
	#cor.test(as.numeric(predicts_new[,1]>0.5),y[,1])
	modD=glmnet(x=as.matrix(dX[c(d0),-c(1)]), y=as.matrix(dX$dv_choice)[c(d0)], family=c("gaussian"),nlambda=length(lambdaD),
		alpha=alphaD,lambda=lambdaD)
	#
	# test overfit model

	yR_00=(modR$a0+as.matrix(rX[-c(r0),-c(1)])%*%modR$beta)[,1]
	yD_00=(modD$a0+as.matrix(dX[-c(d0),-c(1)])%*%modD$beta)[,1]

	# overfit model
	mean(rX$dv_choice[-c(r0)][which(reps$candidate[-c(r0)]=='A')][which(yR_00[which(reps$candidate[-c(r0)]=='A')]>yR_00[which(reps$candidate[-c(r0)]=='B')])])
	#[1] 0.7075366
	mean(dX$dv_choice[-c(d0)][which(dems$candidate[-c(d0)]=='A')][which(yD_00[which(dems$candidate[-c(d0)]=='A')]>yD_00[which(dems$candidate[-c(d0)]=='B')])])
	#[1] 0.6923631

} #end if do.ridge
###################################
###################################
# END RIDGE
###################################
###################################

###########################
#STEP 4: respondent level data for predictions
###########################

###################################
###################################
# RESPONDENT LEVEL SCORES: reduce reps_synth to unique respondents
###################################
###################################



rid=unique(reps$respondent)
reps_synth=matrix(NA,length(rid),ncol(reps))
reps_synth=as.data.frame(reps_synth)
names(reps_synth)=names(reps)
for(i in 1:length(rid)){
	iq=which(rid[i]==reps$respondent)[1]
	reps_synth[i,]=reps[i,]
}

###################################
###################################
# RESPONDENT LEVEL SCORES: reduce dems_synth to unique respondents
###################################
###################################

did=unique(dems$respondent)
dems_synth=matrix(NA,length(did),ncol(dems))
dems_synth=as.data.frame(dems_synth)
names(dems_synth)=names(dems)
for(i in 1:length(did)){
	iq=which(did[i]==dems$respondent)[1]
	dems_synth[i,]=dems[i,]
}


###########################
#STEP 5: initiate prediction
###########################

###################################
###################################
# set other candidate attributes to averages
###################################
###################################

lst2=c('pty','g_female','re_black','re_hispanic','r_catholic','r_evangelical','r_protestant',
'o_ceo','o_citycouncil','o_factoryforeman','o_farmer','o_usarmymajor','o_politicalstaffer',
'o_smallbizowner','o_stateleg','o_teacher','p_compassionate','p_empathetic','p_inspiring',
'p_intelligent','p_knowledgeable','p_moral','p_strongleader','e_business','e_christian',
'e_civilrights','e_energy','e_environment','e_guncontrol','e_gunrights','e_laborunions',
'e_reproductive','e_taxreform','e_teaparty','e_veterans','rec_refuse','rec_secure','rec_stand','rec_work')

for(j in lst2){
	reps_synth[,j]=mean(reps_synth[,j],na.rm=T)
}

for(j in lst2){
	dems_synth[,j]=mean(dems_synth[,j],na.rm=T)
}

###################################
###################################
# elaborates every possible candidate combination of two issues;
#  - predicts support for each respondent for each of these combinations
#  - using above saturated regression models
###################################
###################################

# experimental factors
cov.list=c(
	'i_righttochoose','i_unbornlives','i_guncontrol','i_gunrights','i_lgbt','i_marriage',
	'i_raisetaxes','i_cuttaxes','i_need','i_govabuse','i_freetrade','i_unfairtrade',
	'i_citizenship','i_bordersecurity','i_reducemilitary','i_strengthenmilitary','i_policing','i_criminals',
	'i_co2emissions','i_drilling'
)

# control list for possible issue combinations
issue.list=c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10)
party.list=c('D','R','D','R','D','R',rep(c('d','r'),7))

if(!file.exists('data/predictedSupportMatricesIns.Rdata')){
candPredictD=candPredictR=list()
candPredictR[[1]]=candPredictR[[2]]=matrix(NA,nrow(reps_synth),129)
candPredictD[[1]]=candPredictD[[2]]=matrix(NA,nrow(dems_synth),129)

# loop samples all combinations of candidates for each respondent; then produces a vector of support scores for that person
#  gets stored as a matrix when combining over all respondents, etc
e0=0;s0=0
for(k in 1:nrow(reps_synth)){
	e0=e0+1
	if(e0==100){
		print(s0+e0)
		s0=e0+s0
		e0=0
	}
	row.vec=allComboSample(row.vec=reps_synth[k,],cov.list=cov.list,party='R',issue.list=issue.list,party.list=party.list)
	nms=names(row.vec)
	for(i in 1:length(cL)){
		for(j in 1:length(cR)){
			iq=which(nms==paste(cL[i],cR[j],sep=':'))
			if(length(iq)==1){
				row.vec[,iq]=row.vec[,cL[i]]*row.vec[,cR[j]]
			}
		}
	}
	rV=row.vec[,insR.coef]

	candPredictR[[1]][k,]=predict(object=primary_elec_reps, newdata=rV)
	#if(do.ridge==T){
		rVV=row.vec[,c(rownames(modR$beta))]
		candPredictR[[2]][k,]=(modR$a0+as.matrix(rVV[,])%*%modR$beta)[,1]
	#}
}

e0=0;s0=0
for(k in 1:nrow(dems_synth)){
	e0=e0+1
	if(e0==100){
		print(s0+e0)
		s0=e0+s0
		e0=0
	}
	row.vec=allComboSample(row.vec=dems_synth[k,],cov.list=cov.list,party='D',issue.list=issue.list,party.list=party.list)
	nms=names(row.vec)
	for(i in 1:length(cL)){
		for(j in 1:length(cR)){
			iq=which(nms==paste(cL[i],cR[j],sep=':'))
			if(length(iq)==1){
				row.vec[,iq]=row.vec[,cL[i]]*row.vec[,cR[j]]
			}
		}
	}
	dV=row.vec[,insD.coef]

	candPredictD[[1]][k,]=predict(object=primary_elec_dems, newdata=dV)
	#if(do.ridge==T){
		dVV=row.vec[,c(rownames(modD$beta))]
		candPredictD[[2]][k,]=(modD$a0+as.matrix(dVV[,])%*%modD$beta)[,1]
	#}
}
save(candPredictD,candPredictR,file='data/predictedSupportMatricesIns.Rdata')
} else {
load('data/predictedSupportMatricesIns.Rdata')
if(do.ridge==T){
	eD=0
	for(j in 1:nrow(candPredictD[[1]])){
		eD=eD+cor.test(candPredictD[[1]][j,],candPredictD[[2]][j,])$est
	}
	eR=0
	for(j in 1:nrow(candPredictR[[1]])){
		eR=eR+cor.test(candPredictR[[1]][j,],candPredictR[[2]][j,])$est
	}
	eR/nrow(candPredictR[[1]])
	eD/nrow(candPredictD[[1]])
	}
} #


###########################
#STEP 6: produce aggreement and consistency scores for each combination of 129 issues for each respondent
#	- ie. 0,1,2 agreement; 0,1,2, consistency for each N-by-129
###########################

#ols regression prediction
#if(do.ridge==T){
#	candPredictR=candPredictR[[1]]
#	candPredictD=candPredictD[[1]]
#} else {

# ridge prediction is used for reasons related to better prediction and stability of prediction
candPredictR=candPredictR[[1]]
candPredictD=candPredictD[[1]]
#}

e0=0
consistent.covs.D=paired.covs.D=array(NA,129)
for(i in 1:(length(cov.list)-1)){
	for(j in (i+1):length(cov.list)){
		if(issue.list[i]!=issue.list[j] & party.list[i]!='R' & party.list[j]!='R'){
			e0=e0+1
			paired.covs.D[e0]=paste(cov.list[i],cov.list[j],sep='::')
			consistent.covs.D[e0]=length(which(tolower(c(party.list[i],party.list[j]))=='d'))
		} # if control
	} # inner loop j
} # outer loop i

e0=0
consistent.covs.R=paired.covs.R=array(NA,129)
for(i in 1:(length(cov.list)-1)){
	for(j in (i+1):length(cov.list)){
		if(issue.list[i]!=issue.list[j] & party.list[i]!='D' & party.list[j]!='D'){
			e0=e0+1
			paired.covs.R[e0]=paste(cov.list[i],cov.list[j],sep='::')
			consistent.covs.R[e0]=length(which(tolower(c(party.list[i],party.list[j]))=='r'))
		} # if control
	} # inner loop j
} # outer loop i


########################
# need an agreement measure from libcon scores + others
########################
# agreement is >=0,<=0; so 0s agree to L and R...; probably should exclude 0 for both;
# -- if so, will need to exclude in all prior analyses too ....
# agreement construction included 0 as agree for both L and R; maybe excluded from both is more accurate

library(stringr)
cov.ix=c("libcon_abrt","libcon_guns","libcon_gays","libcon_taxs","libcon_need",
	"libcon_trad","libcon_immi","libcon_dfns","libcon_crme","libcon_envs")
pty.list=as.numeric(tolower(party.list)=='r')*2-1

candAgreeR=matrix(NA,nrow(candPredictR),ncol(candPredictR))
candAgreeD=matrix(NA,nrow(candPredictD),ncol(candPredictD))

# for R
for(j in 1:129){
	i0=str_locate(paired.covs.R[j],pattern='::')
	#print(c(paired.covs.R[j],cov.list[c(grep(cov.list,pattern=str_sub(paired.covs.R[j],1,i0[1]-1)),grep(cov.list,pattern=str_sub(paired.covs.R[j],i0[2]+1)))]))
	#print(c(cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],1,i0[1]-1))]],cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],i0[2]+1  ))]]))
	#cov.list[c(grep(cov.list,pattern=str_sub(paired.covs.R[j],1,i0[1]-1)),grep(cov.list,pattern=str_sub(paired.covs.R[j],i0[2]+1)))]
	#cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],1,i0[1]-1))]]
	#cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],i0[2]+1  ))]]

	p1=pty.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],1,i0[1]-1))]
	p2=pty.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],i0[2]+1  ))]

	a0=as.numeric(p1==as.numeric(reps_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],1,i0[1]-1))]]]>=0))
	a0=a0+as.numeric(p1==-as.numeric(reps_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],1,i0[1]-1))]]]<=0))
	a0=a0+as.numeric(p2==as.numeric(reps_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],i0[2]+1  ))]]]>=0))
	a0=a0+as.numeric(p2==-as.numeric(reps_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.R[j],i0[2]+1  ))]]]<=0))
	candAgreeR[,j]=a0
	rm(a0)
}

# for D
for(j in 1:129){
	i0=str_locate(paired.covs.D[j],pattern='::')
	#print(c(paired.covs.D[j],cov.list[c(grep(cov.list,pattern=str_sub(paired.covs.D[j],1,i0[1]-1)),grep(cov.list,pattern=str_sub(paired.covs.D[j],i0[2]+1)))]))
	#print(c(cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],1,i0[1]-1))]],cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],i0[2]+1  ))]]))}
	#cov.list[c(grep(cov.list,pattern=str_sub(paired.covs.D[j],1,i0[1]-1)),grep(cov.list,pattern=str_sub(paired.covs.D[j],i0[2]+1)))]
	#cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],1,i0[1]-1))]]
	#cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],i0[2]+1  ))]]

	p1=pty.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],1,i0[1]-1))]
	p2=pty.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],i0[2]+1  ))]

	a0=as.numeric(p1==as.numeric(dems_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],1,i0[1]-1))]]]>=0))
	a0=a0+as.numeric(p1==-as.numeric(dems_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],1,i0[1]-1))]]]<=0))
	a0=a0+as.numeric(p2==as.numeric(dems_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],i0[2]+1  ))]]]>=0))
	a0=a0+as.numeric(p2==-as.numeric(dems_synth[,cov.ix[issue.list[grep(cov.list,pattern=str_sub(paired.covs.D[j],i0[2]+1  ))]]]<=0))
	candAgreeD[,j]=a0
	rm(a0)
}

########################
# assess agreement measure from libcon scores + others
########################
# entirely consistent w/ AMCE

muR=c(mean(candPredictR[,grep(paired.covs.R,pattern='i_righttochoose')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_unbornlives')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_guncontrol')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_gunrights')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_lgbt')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_marriage')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_raisetaxes')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_cuttaxes')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_need')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_govabuse')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_freetrade')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_unfairtrade')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_citizenship')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_bordersecurity')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_reducemilitary')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_strengthenmilitary')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_policing')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_criminals')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_co2emissions')]),
	mean(candPredictR[,grep(paired.covs.R,pattern='i_drilling')])
)

muD=c(mean(candPredictD[,grep(paired.covs.D,pattern='i_righttochoose')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_unbornlives')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_guncontrol')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_gunrights')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_lgbt')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_marriage')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_raisetaxes')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_cuttaxes')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_need')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_govabuse')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_freetrade')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_unfairtrade')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_citizenship')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_bordersecurity')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_reducemilitary')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_strengthenmilitary')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_policing')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_criminals')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_co2emissions')]),
	mean(candPredictD[,grep(paired.covs.D,pattern='i_drilling')])
)

cov.list[order(muD)]
cov.list[order(muR)]

########################
# assess agreement measure from libcon scores + others
########################
# entirely consistent w/ AMCE

auR=c(mean(candAgreeR[,grep(paired.covs.R,pattern='i_righttochoose')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_unbornlives')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_guncontrol')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_gunrights')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_lgbt')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_marriage')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_raisetaxes')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_cuttaxes')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_need')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_govabuse')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_freetrade')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_unfairtrade')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_citizenship')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_bordersecurity')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_reducemilitary')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_strengthenmilitary')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_policing')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_criminals')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_co2emissions')]),
	mean(candAgreeR[,grep(paired.covs.R,pattern='i_drilling')])
)

auD=c(mean(candAgreeD[,grep(paired.covs.D,pattern='i_righttochoose')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_unbornlives')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_guncontrol')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_gunrights')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_lgbt')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_marriage')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_raisetaxes')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_cuttaxes')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_need')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_govabuse')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_freetrade')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_unfairtrade')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_citizenship')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_bordersecurity')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_reducemilitary')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_strengthenmilitary')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_policing')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_criminals')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_co2emissions')]),
	mean(candAgreeD[,grep(paired.covs.D,pattern='i_drilling')])
)

cov.list[order(auD)]
cov.list[order(auR)]


###########################
#STEP 7: replicate agreement x consistency analysis;
#		- simulate all unique 2-way races where candidates face off against each other and themselves
#   - produce rates of support for each respondent, then average over these for each level of consistency x agreement
###########################

########################################################################
########################################################################
# agreement / consistency analysis
########################################################################
########################################################################

########################################################################
# generate all unique 2-way races from above for each respondent, candidates can face *themselves*
########################################################################
# idea is every voter gets all possible comparions, then votes for higher expected support
# estimate support as a function of agreement + consistency
e0=0
for(i in 1:129){
	for(j in (i):129){
		e0=e0+1
	}
}
# 8385 races
# have consistency measure above
suppR_B=suppR_A=matrix(NA,nrow(candPredictR),e0)
agreR_B=agreR_A=matrix(NA,nrow(candPredictR),e0)
consR_B=consR_A=matrix(NA,nrow(candPredictR),e0)
# have consistency measure above
suppD_B=suppD_A=matrix(NA,nrow(candPredictD),e0)
agreD_B=agreD_A=matrix(NA,nrow(candPredictD),e0)
consD_B=consD_A=matrix(NA,nrow(candPredictD),e0)

e0=0
for(i in 1:129){
	for(j in (i):129){
		e0=e0+1
		# R here
		suppR_A[,e0]=as.numeric(candPredictR[,i]>candPredictR[,j])
		suppR_B[,e0]=as.numeric(candPredictR[,i]<candPredictR[,j])
		suppR_A[which(candPredictR[,i]==candPredictR[,j]),e0]=suppR_B[which(candPredictR[,i]==candPredictR[,j]),e0]=as.numeric(candPredictR[,i]==candPredictR[,j])/2

		agreR_A[,e0]=candAgreeR[,i]
		agreR_B[,e0]=candAgreeR[,j]

		consR_A[,e0]=consistent.covs.R[i]
		consR_B[,e0]=consistent.covs.R[j]

		# D here
		suppD_A[,e0]=as.numeric(candPredictD[,i]>candPredictD[,j])
		suppD_B[,e0]=as.numeric(candPredictD[,i]<candPredictD[,j])
		suppD_A[which(candPredictD[,i]==candPredictD[,j]),e0]=suppD_B[which(candPredictD[,i]==candPredictD[,j]),e0]=as.numeric(candPredictD[,i]==candPredictD[,j])/2

		agreD_A[,e0]=candAgreeD[,i]
		agreD_B[,e0]=candAgreeD[,j]

		consD_A[,e0]=consistent.covs.D[i]
		consD_B[,e0]=consistent.covs.D[j]
	}
}

y=c(c(suppR_A),c(suppR_B),c(suppD_A),c(suppD_B))
s=c(c(consR_A),c(consR_B),c(consD_A),c(consD_B))
a=c(c(agreR_A),c(agreR_B),c(agreD_A),c(agreD_B))

# table
tab1=cbind(
	c(mean(y[which(a==2 & s==2)]),
  mean(y[which(a==2 & s==1)]),
  mean(y[which(a==2 & s==0)])),
	c(mean(y[which(a==1 & s==2)]),
  mean(y[which(a==1 & s==1)]),
  mean(y[which(a==1 & s==0)])),
	c(mean(y[which(a==0 & s==2)]),
  mean(y[which(a==0 & s==1)]),
  mean(y[which(a==0 & s==0)]))
)



# frequency of mis-classifications across issues
id=which(dems_synth$pid==-1)
ir=which(reps_synth$pid==1)
dems_synth$xmu=
c(as.numeric(dems_synth$libcon_envs[id]>0)+
as.numeric(dems_synth$libcon_crme[id]>0)+
as.numeric(dems_synth$libcon_abrt[id]>0)+
as.numeric(dems_synth$libcon_gays[id]>0)+
as.numeric(dems_synth$libcon_immi[id]>0)+
as.numeric(dems_synth$libcon_trad[id]>0)+
as.numeric(dems_synth$libcon_guns[id]>0)+
as.numeric(dems_synth$libcon_dfns[id]>0)+
as.numeric(dems_synth$libcon_taxs[id]>0)+
as.numeric(dems_synth$libcon_need[id]>0))

reps_synth$xmu=
c(as.numeric(reps_synth$libcon_envs[ir]<0)+
as.numeric(reps_synth$libcon_crme[ir]<0)+
as.numeric(reps_synth$libcon_abrt[ir]<0)+
as.numeric(reps_synth$libcon_gays[ir]<0)+
as.numeric(reps_synth$libcon_immi[ir]<0)+
as.numeric(reps_synth$libcon_trad[ir]<0)+
as.numeric(reps_synth$libcon_guns[ir]<0)+
as.numeric(reps_synth$libcon_dfns[ir]<0)+
as.numeric(reps_synth$libcon_taxs[ir]<0)+
as.numeric(reps_synth$libcon_need[ir]<0))

dems_synth$ymu=
c(as.numeric(dems_synth$libcon_envs[id]<0)+
as.numeric(dems_synth$libcon_crme[id]<0)+
as.numeric(dems_synth$libcon_abrt[id]<0)+
as.numeric(dems_synth$libcon_gays[id]<0)+
as.numeric(dems_synth$libcon_immi[id]<0)+
as.numeric(dems_synth$libcon_trad[id]<0)+
as.numeric(dems_synth$libcon_guns[id]<0)+
as.numeric(dems_synth$libcon_dfns[id]<0)+
as.numeric(dems_synth$libcon_taxs[id]<0)+
as.numeric(dems_synth$libcon_need[id]<0))

reps_synth$ymu=
c(as.numeric(reps_synth$libcon_envs[ir]>0)+
as.numeric(reps_synth$libcon_crme[ir]>0)+
as.numeric(reps_synth$libcon_abrt[ir]>0)+
as.numeric(reps_synth$libcon_gays[ir]>0)+
as.numeric(reps_synth$libcon_immi[ir]>0)+
as.numeric(reps_synth$libcon_trad[ir]>0)+
as.numeric(reps_synth$libcon_guns[ir]>0)+
as.numeric(reps_synth$libcon_dfns[ir]>0)+
as.numeric(reps_synth$libcon_taxs[ir]>0)+
as.numeric(reps_synth$libcon_need[ir]>0))

reps_synth$knowledge=(reps_synth$know_senate1+
reps_synth$know_senate2+
reps_synth$know_gov+
reps_synth$us_house_majority+
reps_synth$us_senate_majority+
reps_synth$self_place_attempt+
reps_synth$know_D_left_R)

dems_synth$knowledge=(dems_synth$know_senate1+
dems_synth$know_senate2+
dems_synth$know_gov+
dems_synth$us_house_majority+
dems_synth$us_senate_majority+
dems_synth$self_place_attempt+
dems_synth$know_D_left_R)


y=c(c(suppR_A),c(suppR_B),c(suppD_A),c(suppD_B))
s=c(c(consR_A),c(consR_B),c(consD_A),c(consD_B))
a=c(c(agreR_A),c(agreR_B),c(agreD_A),c(agreD_B))
k=c(rep(reps_synth$knowledge, 8385),rep(reps_synth$knowledge, 8385),rep(dems_synth$knowledge, 8385),rep(dems_synth$knowledge, 8385))
m=c(rep(reps_synth$ymu,8385),rep(reps_synth$ymu,8385),rep(dems_synth$ymu,8385),rep(dems_synth$ymu,8385))

save(y,s,a,k,m,file='data/AgreeConsistentSupportImpute.Rdata')

rm(list=ls()[which(ls()!='dirs')])

###########################################
###Then, produce models w/ Clustered SEs
library(xtable)
#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank #doF
 #dfc <- (M/(M-1))*((N-1)/(N-K))
 dfc <- ((N-1)/(N-K))
 # estfun is :: residuals(fm) * X
 # summing estimating function to the cluster || i.e., summing residuals to the cluster level  over units in cluster
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

load('data/AgreeConsistentSupportImpute.Rdata')
#load('data/issueScorePredictMatricesIns.Rdata')
library(stringr)
id_tp=paste(s,a,k,m,sep='_')
muV=tapply(y,id_tp,mean)
idx=c(rep(1:856,8385),rep(1:856,8385),rep(856+1:1415,8385),rep(856+1:1415,8385))
dats=as.data.frame(cbind(idx,y,a,s,m,k))
dats$idx=as.character(dats$idx)
s0=as.numeric(str_sub(names(muV),1,1))
a0=as.numeric(str_sub(names(muV),3,3))
k0=as.numeric(str_sub(names(muV),5,5))
m0=as.numeric(str_sub(names(muV),7))
modLL=(lm(y~a*s*k,subset=which(m>3 & m<8)))
modLL_CL <- clse.f(dats[which(m>3 & m<8),],modLL,idx[which(m>3 & m<8)])
#modLL_SL = summary(modLL)

#xtable(modLL_CL)
xtable(summary(lm(muV~a0*s0*k0,subset=which(m0>3 & m0<8))))

print(xtable(summary(lm(muV~a0*s0,subset=which(k0>5 & m0>1 & m0<9)))),file='appendix/figures/conditional_cross_6_7.tex')
print(xtable(summary(lm(muV~a0*s0,subset=which(k0<6 & k0>2 & m0>1 & m0<9)))),file='appendix/figures/conditional_cross_3_5.tex')
print(xtable(summary(lm(muV~a0*s0,subset=which(k0<3 & m0>1 & m0<9)))),file='appendix/figures/conditional_cross_0_2.tex')


#modLL=(lm(y~a*s*k,subset=which(m>2 & m<9)))
#modLL_CL <- clse.f(dats[which(m>2 & m<9),],modLL,idx[which(m>2 & m<9)])
#modLL_SL = summary(modLL)

#xtable(modLL_CL)
#xtable(summary(lm(muV~a0*s0*k0,subset=which(m0>2 & m0<9))))
